Show Code
# Load packages
library(tidyverse)
library(tidytuesdayR)
library(here)
# Load helper functions
source(here::here("R/utils/tidy_tuesday_helpers.R"))
# Set project title
title <- "Project Gutenberg"
tt_date <- "2025-06-03"Steven Villalon
June 3, 2025
How many books in Project Gutenberg have a Latin American country as one their subjects?
Goal: make a map with the number of books as a label.
# Load data from tidytuesdayR package
tuesdata <- tidytuesdayR::tt_load(tt_date)
# Extract elements from tuesdata
gutenberg_authors <- tuesdata$gutenberg_authors
gutenberg_languages <- tuesdata$gutenberg_languages
gutenberg_metadata <- tuesdata$gutenberg_metadata
gutenberg_subjects <- tuesdata$gutenberg_subjects
# Remove tuesdata file
rm(tuesdata)# List of Latin American countries
latam_caribbean_countries <- c(
# South America
"Argentina", "Bolivia", "Brazil", "Chile", "Colombia",
"Ecuador", "Guyana", "Paraguay", "Peru", "Suriname", "Uruguay", "Venezuela",
# Central America
"Costa Rica", "El Salvador", "Guatemala", "Honduras", "Nicaragua", "Panama",
# Caribbean
"Cuba", "Dominican Republic", "Puerto Rico",
# North America (Spanish-speaking)
"Mexico"
)# Initialize an empty list to collect rows
rows <- list()
# Loop over each country and compute count
for (country in latam_caribbean_countries) {
count <- sum(str_detect(pg_clean$subjects, fixed(country)))
rows[[country]] <- data.frame(
country = country,
book_count = count
)
}
# Combine all rows into a single data frame and sort descending
cnts_by_country <- bind_rows(rows) |>
mutate(cnt_group = case_when(
book_count < 50 ~ "0 - 49",
book_count < 100 ~ "50 - 99",
book_count >= 100 ~ "100+",
)) |>
mutate(cnt_group = factor(cnt_group, levels = c("0 - 49", "50 - 99", "100+"), ordered = TRUE)) |>
arrange(desc(book_count))
cnts_by_countryLinking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
# Set lat/lon parameters for plotting area
long_min <- -125
long_max <- -30
lat_min <- -60
lat_max <- 35
# Load country shapes
world <- ne_countries(scale = "medium", returnclass = "sf")
# Join country shapes to cnts_by_country
world_counts <- world |>
inner_join(cnts_by_country, by = c("name" = "country"))
# Extract lat/lon from sf object
world_counts <- world_counts |>
mutate(label_point = suppressWarnings(st_point_on_surface(geometry))) |>
mutate(
lon = st_coordinates(label_point)[, 1],
lat = st_coordinates(label_point)[, 2]
)library(ggrepel)
library(showtext)
library(ggtext)
# Load Lato font
font_add_google("Lato", "lato")
showtext_auto()
showtext_opts(dpi = 300)
# Make plot
final_plot <-
ggplot(world_counts) +
geom_sf(color = "gray40") +
coord_sf(
xlim = c(long_min, long_max),
ylim = c(lat_min, lat_max)
) +
geom_label_repel(
data = world_counts |> filter(book_count >= 10),
aes(x = lon,
y = lat,
label = paste(name, "\n", book_count),
fill = cnt_group),
family = "lato",
size = 3,
alpha = 0.9,
fontface = "bold",
label.size = 0.2,
max.overlaps = 30
) +
scale_fill_manual(values = c(
"0 - 49" = "white",
"50 - 99" = "#E6C36D",
"100+" = "#A8C7A1"
)) +
labs(
title = "How many books in the Gutenberg online \nlibrary are about Latin America?",
subtitle = "Mexico was the sure bet to have the most books, but surprised to see so few for Puerto Rico, \nColombia, and Venezuela. Books in the library are at least 95 years old and are not currently \nunder copyright.",
caption = "Chart produced by Steven Villalon for Tidy Tuesday exercise on June 3, 2025.",
fill = "Book Counts") +
theme_minimal(base_family = "lato") +
theme(
plot.background = element_rect(fill = "black", color = NA),
panel.background = element_rect(fill = "black", color = NA),
legend.background = element_rect(fill = "black", color = NA),
legend.key = element_rect(fill = "black", color = NA),
legend.position = "inside",
legend.position.inside = c(0.15, 0.5),
legend.justification = c("left", "center"),
text = element_text(color = "white"),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
plot.caption = element_text(color = "white", hjust = 0),
plot.title = element_text(color = "#E8A6A1", face = "bold", size = 20),
plot.subtitle = element_text(color = "gray90", size = 10)
) +
guides(fill = guide_legend(override.aes = list(label = ""))) # Remove "a" from legend